*** merge info on request to transfer to specific canton **

clear all
set matsize 800

*add file path
gl dat ""
gl do ""

cap pr drop max
pr de max
	ren `1' `1'_
	bys id: egen `1'=max(`1'_)
	drop `1'_
end

*************************
*** clean request data **
*************************

import delimited "$dat/canton_assignment/comments.txt", delimiter(space) encoding(ISO-8859-1) clear 

duplicates drop

tostring pers_id, replace
ren ktv_pz_wunsch_kt_kz wunschkanton
ren ktv_kt_kz cant
replace wunschkanton=trim(wunschkanton)

*date
tostring ktv_d, replace
g test=length(ktv_d)
replace ktv_d="0" + ktv_d if test==7
g date_request=date(ktv_d, "DMY")
g m_request=mofd(date_request)
g year_request=year(date_request)
format date_request %td 
format m_request %tm
drop test ktv_d


* recode to numeric
foreach x in nowunsch anytext variable_canton_match dublin comment_canton_match comment_deutschspr comment_franzspr zentrale comment_any_canton {
replace `x'="1" if `x'=="TRUE"
replace `x'="0" if `x'=="FALSE"
destring `x', replace
}

* correct 15 obs with error in coding
tab comment_canton_match anytext // 15 obs coded as match but anytext=0 and wunschkanton==!
tab wunschkanton if variable_canton_match==1, mis
replace  comment_canton_match=0 if anytext==0 & !mi(comment_canton_match) 
replace  variable_canton_match=0 if wunschkanton=="!" & !mi(variable_canton_match) 


*duplicates
* differ on anytext, zentrale, comment_canton_match, dublin, comment_any_canton, since at least one is TRUE change to TRUE for both

duplicates tag pers_id date_request cant wunschkanton, g(dup)
tab dup // 98 obs with 1 dup

foreach x in anytext zentrale comment_canton_match dublin comment_any_canton comment_franzspr {
bys pers_id date_request cant wunschkanton dup: egen `x'_=max(`x')
replace `x'=`x'_ if dup==1
drop `x'_
}

drop dup
duplicates drop



tempfile t
save "`t'", replace

*****************************
** merge with ZEMIS file  **
****************************
*to get additional info on when case starts

use "$dat/ZEMIS_all.dta", clear

ren pz1_b_akt_jm case_begin
ren auf_b_jm arrive
ren kt_kz cantass
ren stich_jm year
replace cantass=trim(cantass)
replace cantass="" if inlist(cantass,"!","?")
replace year=int(year/100)

replace arrive=. if arrive==207012 //means no info
tostring arrive, g(temp1_)
g temp1=date(temp1_, "YM")
g marrive_exact=mofd(temp1)
replace arrive=int(arrive/100)

replace case_begin=. if case_begin==207012
tostring case_begin, g(temp2_)
g temp2=date(temp2_, "YM")
g mcase_begin_exact=mofd(temp2)
g mcase_begin=mod(case_begin, 100) // just month
replace case_begin=int(case_begin/100)

drop temp*
format mcase_begin_exact %tm 
format marrive_exact %tm 

bys pers_id (year): g yearN=_n==1 
g miss_cantass=mi(cantass)
bys pers_id (year): replace cantass = cantass[_n+1] if mi(cantass) & yearN==1  & inlist(mcase_begin,10,11,12) & year==case_begin // if arrive late and assigned the following year

keep if yearN==1
drop yearN 
ren year fyear
merge 1:m pers_id using "`t'" 

tab arrive if _merge==1 // 99 % before 2008
tab year_request if _merge==2 // 98% in 2017 and 2018
keep if _merge==3
drop _merge

*****************************************************
** reduce sample to allocations within time window **
*****************************************************
ren pers_id id
drop if mi(arrive)
drop if mi(case_begin)

* compare first assignment date to case_start and arrival date
bys id (date_request): g num=sum(date_request!=date_request[_n-1])

g diff_start=mcase_begin_exact-m_request
g diff_arrive= marrive_exact-m_request
g diff_arrive_start= marrive_exact-mcase_begin_exact


tab diff_start if num==1 // 93 % assigned within 3 months (0 to -3)
tab diff_arrive if num==1 // 97 % assigned within 3 months
tab diff_arrive_start if num==1 // 96 % arrive same month

*drop individuals with any assignments before case_starts (maybe pre-processed), even if they also have assignment within correct window
bys id: egen temp=max(diff_start)
drop if temp>0 & !mi(temp)
 
* assigned within correct window
keep if inlist(diff_arrive_start, 0, -1) // can arrive 1 month before case starts
keep if inlist(diff_start, 0,-1,-2,-3) // can be assigned up to 3 months after case starts

*individuals with multiple cantons in the assignment period
bys id (cant): g sum_cant=sum(cant!=cant[_n-1]) if cant!="!"
bys id: egen sum_cant_tot=max(sum_cant)
drop if mi(sum_cant_tot) // individuals that are only cant=!, 30 obs
tab sum_cant_tot if num==1 // 96 % only one canton
g multiple_cant_tot=(sum_cant_tot!=1) 

** german french 
bys id: egen german_info=max(comment_deutschspr)
bys id: egen french_info=max(comment_franzspr)

*no cant mentioned in assignment period (irrespective if fulfilled or not)
bys id: egen nocomcant_info=sum(comment_any_canton)
replace nocomcant_info=(nocomcant_info==0)

g wunschreq=(nowunsch==0)
bys id: egen nowunsch_info=sum(wunschreq) 
replace nowunsch_info=(nowunsch_info==0)


*canton match during assignment period 
g cantreq_= ((comment_canton_match==1)| (variable_canton_match==1))
bys id : egen cantreq=max(cantreq_) // at least one match in assignment period

tab cantass if cantreq_==1 & cantass==cant // 52 000
tab cantass if cantreq_==1 & cantass!=cant, mis  // 1 650
tab cantass if cantreq_==1 & cantass!=cant & multiple_cant_tot!=1, mis // 350, so most errors due to change of canton later on
drop cantreq_

*for multiple cant assignments during assignment period use the first one
* individuals with only cant==! have been dropped
* assume info in cant more correct than cantass
drop if cant=="!"
drop num
bys id (date_request): g num=sum(date_request!=date_request[_n-1])
keep if num==1

keep id date_request cant multiple_cant_tot nocomcant_info nowunsch_info cantreq  german_info french_info cantass
duplicates drop

duplicates tag id, g(dup)
tab dup // 1 % of obs different canton
bys id: egen temp=max(cant==cantass)
drop if dup!=0 & cant!=cantass & temp==1 // keep the obs that correspond to assignment canton
drop temp dup
duplicates tag id, g(dup)
tab dup // 18 obs
sort id cant // will drop in alphabetic order
duplicates drop id, force
drop dup

drop cantass // from zemis file
ren cant cantass // from comment file

ren id id_orig
save "$dat/comments.dta" , replace

